gl path "~/Dropbox/voter_id/Replication"

cap log close
log using "$path/figure2.log", replace

use "$path/didnt_have_id.dta", clear

rename county_id county
merge m:m county voter_reg_num using "$path/voter_file.dta", keep(1 3) nogen

// merge in list of no DMV matches
merge m:1 ncid using "$path/no_dmv_match.dta", keep(1 3) nogen
replace no_dmv_match = 0 if no_dmv_match == .

keep last_name first_name pv_name pv_status no_dmv_match

gen vf_name = last_name + "," + first_name
order vf_name, after(pv_name)

split pv_name, parse(",")
rename pv_name1 pv_last_name
gen last_match = pv_last_name == last_name
gen pv_first_name = word(pv_name2, 1)
gen first_match = pv_first_name == first_name
gen match = last_match | first_match

bysort pv_status: tab no_dmv_match if match

gen pv = match & pv_status == "NOT COUNTED"

gen dmv_match = no_dmv_match == 0
keep dmv_match no_dmv_match pv

gen pv_text = ""
drop if pv == 0
replace pv_text = "Provisional Ballot Rejected for Lack of ID" if pv == 1

// append dataset we used for descriptive stats so that no_dmv_match rate is same as in descriptive stats table
append using "$path/nc_dataset.dta", keep(no_dmv_match birth_year ///
	hispanic black white othernw id new_reg voted)
drop if new_reg == 1
drop new_reg
egen non_miss = rownonmiss(hispanic black white othernw birth_year voted)
gen in_sample = non_miss == 6
drop hispanic black white othernw birth_year voted
drop if in_sample == 0 & pv == .
replace pv = 0 if pv == .
replace id = _n if pv == 1
replace dmv_match = 1 if no_dmv_match == 0 
replace dmv_match = 0 if no_dmv_match == 1
drop non_miss in_sample
bysort id pv: gen temp = _n 
drop if temp > 1
drop id temp
compress
replace pv_text = "All Registrants" if pv_text == ""

graph bar (mean) dmv_match no_dmv_match, over(pv_text, label(labsize(medium))) blabel(total, format(%9.3f)) ///
	legend(off)
	
graph export "$path/figure2.pdf", replace

log close
